library(ggplot2)
library(dplyr)
library(xtable)
library(readr)


# setwd 

plot_data <- read_table("figure1_data.txt")

est_mod1 <- as.vector(plot_data[seq(1,13,2),2])
est_mod2 <- as.vector(plot_data[seq(1,13,2),3])
est_mod3 <- as.vector(plot_data[seq(1,13,2),4])
est_mod4 <- as.vector(plot_data[seq(1,13,2),5])

se_mod1 <- plot_data[seq(2,14,2),2]
se_mod2 <- plot_data[seq(2,14,2),3]
se_mod3 <- plot_data[seq(2,14,2),4]
se_mod4 <- plot_data[seq(2,14,2),5]

plot_data <- data_frame(est = c(est_mod4$X5, est_mod1$X2, est_mod2$X3, est_mod3$X4),
                        se  = c(se_mod4$X5, se_mod1$X2,  se_mod2$X3,  se_mod3$X4),
                        shape = c(rep("a", 7), rep("b", 21)),
                        lab = factor(rep(c("Population \n Validated turnout",
                                           "\nDNES sample frame \nValidated turnout", 
                                           "\nDNES respondents \nValidated turnout", 
                                           "\nDNES respondents \nSelf-reported turnout"), each = 7)), 
                        pla = c(seq(1.45, 7.45, 1), seq(1.2, 7.2, 1), seq(1, 7, 1), seq(0.8, 6.8, 1)))


plot_data <- 
  plot_data %>%
  mutate(lab = factor(lab, levels(lab)[c(4,3,2,1)]),
         est = 100 * as.numeric(est),
         se  = 100 * as.numeric(se))
plot_data[c(1,8,15,22),1:2] <- plot_data[c(1,8,15,22),1:2]*10


## population comparison

ggplot(data = na.omit(plot_data),
       aes(x = est,
           y = pla, 
           xmax = qnorm(0.975)*se + est,
           xmin = qnorm(0.025)*se + est)) + 
  geom_errorbarh(aes(color = lab), height = 0) + 
  geom_point(aes(color = lab, shape = lab), size = 3) + 
  theme_classic() + 
  xlab("Difference in turnout") + 
  ylab("") +
  scale_y_continuous(breaks = 1:7,
                     labels = c("Age (10 years)",
                                "High School",
                                "Vocational training",
                                "Short/mid-cycle",
                                "Long-cycle",
                                "Female",
                                "Non-native")) +
  scale_color_manual(name = "",
                     values=c("black", "black", "grey50", "grey75"),
                     labels = c("Population \n Validated turnout",
                                "\nDNES sample frame \nValidated turnout", 
                                "\nDNES respondents \nValidated turnout", 
                                "\nDNES respondents \nSelf-reported turnout") ) +
  scale_shape_manual(name = "", 
                     values = c(1, 16, 16, 16),
                     labels = c("Population \n Validated turnout",
                                "\nDNES sample frame \nValidated turnout", 
                                "\nDNES respondents \nValidated turnout", 
                                "\nDNES respondents \nSelf-reported turnout")) +
  geom_vline(xintercept = 0, lty = 2, alpha = 0.3) + 
  theme(text = element_text(family = "sans"))

#check pop estimate is always in CI for sample frame estimate 
plot_data[1:7, 1] > plot_data[8:14, 1] + qnorm(0.025) * plot_data[8:14, 2] &
  plot_data[1:7, 1] < plot_data[8:14, 1] + qnorm(0.975) * plot_data[8:14, 2]

ggsave(plot = last_plot(), filename = "figure1.pdf", width = 8, height = 4, dpi = 300)

# model estimates in table ------------------------------------------------

res_tab <- 
  matrix(NA, ncol = 4, nrow = 16) 

ests <- 
  cbind(est_mod4, 
        est_mod1, 
        est_mod2, 
        est_mod3)

ses <- 
  cbind(se_mod4, 
        se_mod1, 
        se_mod2, 
        se_mod3)

ests <- 
  ests %>%
  mutate(X5 = as.numeric(X5),
         X2 = as.numeric(X2),
         X3 = as.numeric(X3),
         X4 = as.numeric(X4))
  
ses <- 
  ses %>%
  mutate(X5 = as.numeric(X5),
         X2 = as.numeric(X2),
         X3 = as.numeric(X3),
         X4 = as.numeric(X4))

row.names(res_tab) <- 
  c("Age (10 years)", 1,
    "Education", "(textit{Baseline = Elementary school})",
    "High School", 3,
    "Vocational training", 4,
    "Short/mid-cycle", "(textit{Up to a Bachelor's})",
    "Long-cycle", "(textit{A Master's or more})", 
    "Female", 7, 
    "Non-native", 8)

colnames(res_tab) <- 
  c("model 1", 
    "model 2", 
    "model 3", 
    "model 4")

for(i in 1:7){
  for(j in 1:4){
    res_tab[1 + 2 * c(0, 2:7)[i], j] <- round(100*ests[i, j], 2)
    res_tab[2 + 2 * c(0, 2:7)[i], j] <- 
      paste("(", round(100*ses[i, j], 2), ")", sep = "")
    if(i == 1){
      res_tab[1 + 2 * c(0, 2:7)[i], j] <- round(100*ests[i, j], 3)
      res_tab[2 + 2 * c(0, 2:7)[i], j] <- 
        paste("(", round(100*ses[i, j], 3), ")", sep = "")
      if(j == 1){
        res_tab[2 + 2 * c(0, 2:7)[i], j] <- 
          paste("(", round(100*ses[i, j], 4), ")", sep = "")
      }
    }
    
  }    
}

write_csv(as.data.frame(res_tab),  "TableS4.csv")


